

# Econ well being in Lebanon ----------------------------------------------------
long$head_work_legal %>% table(useNA = "always")
long$head_work_hours_4 %>% table(useNA = "always")
long$head_income %>% table(useNA = "always")
long$hh_inc_source_aid %>% table(useNA = "always")
long$aid_atm %>% table(useNA = "always")
long$aid_wfp %>% table(useNA = "always")
df$aid_wfp %>% table(useNA = "always")
long$hh_leb_2011 %>% table(useNA = "always")
long$hh_inc_stable %>% table(useNA = "always")
long$aid_chg %>% table(useNA = "always")
long$own_items_1 %>% table(useNA = "always")
long$own_items_2 %>% table(useNA = "always")
long$own_items_3 %>% table(useNA = "always")
long$own_items_4 %>% table(useNA = "always")
long$own_items_5 %>% table(useNA = "always")
long$own_items_6 %>% table(useNA = "always")
long$own_items_7 %>% table(useNA = "always")
long$own_items_8 %>% table(useNA = "always")
long$own_items_9 %>% table(useNA = "always")
long$own_items_10 %>% table(useNA = "always")
long$own_items_11 %>% table(useNA = "always")
long$expenses_ability %>% table(useNA = "always")
long$assets_value %>% table(useNA = "always")
long$hh_income %>% table(useNA = "always")

econ_well_being_leb = c("head_work_legal", "head_work_days_4", "head_work_hours_4", "head_income",
                        "hh_inc_source_aid", "aid_atm", "aid_wfp", "hh_leb_2011", "hh_inc_stable", 
                        "aid_chg", "own_items_1", "own_items_2", "own_items_3", "own_items_4", "own_items_5",
                        "own_items_6", "own_items_7", "own_items_8", "own_items_10", "own_items_11",
                        "expenses_ability", "assets_value", "assets_months", "hh_income")

long[long$.imp > 0,econ_well_being_leb] %>% summary

m = 10

data_pc = matrix(nrow = length(econ_well_being_leb), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_econ_leb[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, econ_well_being_leb], cor_type = "pear")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, econ_well_being_leb], cor_type = "pear")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, econ_well_being_leb], cor_type = "pear")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_econ_leb_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, econ_well_being_leb], cor_type = "pear")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_econ_leb_variance", i, ".csv"))
  
  W = mixedCor(long[long$.imp == i, econ_well_being_leb], c = 1:24)$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Economic well-being in Lebanon") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_econ_leb.pdf", height = 4, width = 5)


# Social wellbeing in Lebanon ----------------------------------------------------
long$intg_connected %>% table(useNA = "always")
long$intg_outsider %>% table(useNA = "always")
long$enum_arabic %>% table(useNA = "always")
long$head_literate %>% table(useNA = "always")
long$intg_politics %>% table(useNA = "always")
long$intg_discuss %>% table(useNA = "always")
long$meal_share %>% table(useNA = "always")
long$intg_access_doctor %>% table(useNA = "always")
long$intg_access_job %>% table(useNA = "always")
long$length_stay %>% table(useNA = "always")

long$ipl_connected = 6 - long$intg_connected #reversing order of variable
long$ipl_outsider = 6 - long$intg_outsider #reversing order of variable
long$ipl_arabic = 4 - long$enum_arabic #reversing order of variable
long$ipl_literate = 4 - long$head_literate #reversing order of variable
long$ipl_politics = 6 - long$intg_politics #reversing order of variable
long$ipl_discuss = long$intg_discuss
long$ipl_meal = long$meal_share
long$ipl_lebanese = long$lebanese_contacts
long$ipl_doctor = 6 - long$intg_access_doctor #reversing order of variable
long$ipl_job = 6 - long$intg_access_job #reversing order of variable
long$ipl_syrian = long$syrian_contacts
long$length_stay %>% table(useNA = "always")

long = long %>% 
  group_by(.imp) %>% 
  mutate(length_stay2 = cut(length_stay, breaks = c(quantile(length_stay, na.rm = T)), include.lowest = T),
         length_stay2 = as.numeric(length_stay2)) %>% 
  ungroup()

long$length_stay = long$length_stay2
long$length_stay2 = NULL

long$curfews_never %>% table(useNA = "always")
long$no_curfews_now %>% table(useNA = "always")
long$public_treat_person %>% table(useNA = "always")

long$ind_public_treat = long$public_treat_person
long$ind_public_treat = 5 - long$ind_public_treat #reverse coding 
long$public_treat_person = NULL

long$auth_treat_person %>% table(useNA = "always")

long$ind_auth_treat = long$auth_treat_person
long$ind_auth_treat = 5 - long$ind_auth_treat #reverse coding 

long$not_detained %>% table(useNA = "always")
long$not_detained = long$detained
long$not_detained = 1 - long$not_detained #reverse coding
long$detained = NULL

long$no_disc_housing = long$disc_housing #Does not need reverse coding
long$mobility %>% table(useNA = "always")
long$disc_housing = NULL

long$ind_mobility = long$mobility
long$ind_mobility = 5 - long$ind_mobility #reverse coding

long$ind_mobility_hh = long$mobility_hh
long$ind_mobility_hh = 4 - long$ind_mobility_hh
long$mobility_hh = NULL

#Note: removed "ipl_meal", "ipl_lebanese" because they're in networks
soc_well_being_leb = c("ipl_connected", "ipl_outsider", "ipl_arabic", "ipl_literate", "ipl_politics", "ipl_discuss", 
                       "ipl_job", "length_stay", "curfews_never", 
                       "no_curfews_now", "ind_public_treat", "ind_auth_treat", "not_detained", "no_disc_housing", 
                       "ind_mobility", "ind_mobility_hh")

long[long$.imp > 0,soc_well_being_leb] %>% summary

data_pc = matrix(nrow = length(soc_well_being_leb), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_soc_leb[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, soc_well_being_leb], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, soc_well_being_leb], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, soc_well_being_leb], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_soc_leb_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, soc_well_being_leb], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_soc_leb_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, soc_well_being_leb])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Social well-being in Lebanon") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_soc_leb.pdf", height = 4, width = 5)



# H3 services in Lebanon --------------------------------------------------
long$head_sick %>% table(useNA = "always")
long$head_not_sick = 1 - long$head_sick #reverse order 
long$head_sick = NULL

long$head_treated %>% table(useNA = "always")
long$disc_healthcare %>% table(useNA = "always")

long$towns_leb %>% table(useNA = "always")
quantile(df$towns_leb, na.rm = T)
long$stability_towns = long$towns_leb
long$stability_towns = with(long, ifelse(stability_towns == 1, 3,
                                         ifelse(stability_towns == 2, 2,
                                                ifelse(stability_towns > 2, 1, stability_towns))))
long$towns_leb = NULL

long$stability_current %>% table(useNA = "always")

long = long %>% 
  group_by(.imp) %>% 
  mutate(stability_current = cut(stability_current, breaks = quantile(stability_current, na.rm = T), include.lowest = T),
         stability_current = as.numeric(stability_current)) %>% 
  ungroup()

long$intg_access_legal %>% table(useNA = "always")
long$access_legal = 6 - long$intg_access_legal #reverse coding

long$person_nosick %>% table(useNA = "always")
long$person_treat %>% table(useNA = "always")

long$no_need_school %>% table(useNA = "always")
long$scl_not_preventive %>% table(useNA = "always")

services_lebanon = c("head_not_sick", "head_treated", "person_nosick", "person_treat", 
                     "ipl_doctor", "disc_healthcare", "no_need_school", 
                     "stability_towns", "stability_current", "scl_not_preventive", "access_legal", "own_items_9")
summary(long[long$.imp > 0,services_lebanon])

data_pc = matrix(nrow = length(services_lebanon), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_services_leb[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, services_lebanon], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, services_lebanon], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, services_lebanon], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_services_leb_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, services_lebanon], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_services_leb_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, services_lebanon])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Services in Lebanon") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_services_leb.pdf", height = 4, width = 5)



# H4: Legal well-being ----------------------------------------------------
long$resident %>% table(useNA = "always")
long$registered %>% table(useNA = "always")

legal_lebanon = c("registered", "resident")
long[long$.imp>0,legal_lebanon] %>% summary

data_pc = matrix(nrow = length(legal_lebanon), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_legal_leb[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, legal_lebanon], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, legal_lebanon], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, legal_lebanon], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_legal_leb_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, legal_lebanon], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_legal_leb_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, legal_lebanon])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Legal sitaution in Lebanon") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_legal_leb.pdf", height = 4, width = 5)

# H5 Syria safety ---------------------------------------------------------
long$risk_urban %>% table(useNA = "always")
long$safety_current = long$risk_urban
long$risk_urban = NULL

long$protests %>% table(useNA = "always")
long$head_violence %>% table(useNA = "always")

long$exposure_violence = long$head_violence
long$head_violence = NULL 

long$conscription %>% table(useNA = "always")
long$safe_future = long$safety_chg
long$safe_future = 5 - long$safe_future #reverse coding
long$safety_chg = NULL

long$info_trust_arabiya %>% table(useNA = "always")
long$info_trust_jazeera %>% table(useNA = "always")
long$info_trust_mayadeen %>% table(useNA = "always")
long$info_trust_manar %>% table(useNA = "always")

long$info_oppsn = rowSums(long[, c("info_trust_arabiya", "info_trust_jazeera")], na.rm = T)/2
long$info_regime = rowSums(long[, c("info_trust_mayadeen", "info_trust_manar")], na.rm = T)/2

long$oppsn_sympathy = ifelse(long$info_oppsn > long$info_regime, 1, 0)

control_syria = c("control_now_regime", "control_now_oppsn", "control_now_russia", "control_now_turkey",
                  "control_now_kurds", "contested_now", "control_past_oppsn", "control_past_russia", 
                  "control_past_regime", "control_past_kurds", "control_past_turkey", "contested_past", "isis_control")

safety_syria = c("safety_current", "oppsn_sympathy", "protests", "exposure_violence",
                 "safe_future", "conscription")

long[long$.imp>0,safety_syria] %>% summary
long[long$.imp>0,control_syria] %>% summary

data_pc = matrix(nrow = length(safety_syria), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_safety_syria[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, safety_syria], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, safety_syria], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, safety_syria], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_safety_syria_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, safety_syria], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_safety_syria_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, safety_syria])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Safety in Syria") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_safety_syr.pdf", height = 4, width = 5)


data_pc = matrix(nrow = length(control_syria), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_control_syria[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, control_syria], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, control_syria], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, control_syria], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_control_syria_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, control_syria], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_control_syria_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, control_syria])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Control in Syria") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_control_syr.pdf", height = 4, width = 5)

long$index_regime_control = - long$index_control_syria #Reverse coding to make regime control positive
long$index_control_syria = NULL

# H6 Econ Syria --------------------------------------------------------------
long$jobs_origin %>% table(useNA = "always")
long$syr_debt %>% table(useNA = "always")

long$debt_fin = NA
long$debt_fin[long$syr_debt == 0] = 0
long$debt_fin[long$syr_debt %in% c(1,2)] = 1
long$debt_fin[long$syr_debt > 2] = 2

long$debt_fin %>% table(useNA = "always")
long$syr_debt = NULL

long$syr_property_1 %>% table(useNA = "always")
long$syr_property_2 %>% table(useNA = "always")
long$syr_property_3 %>% table(useNA = "always")

long$syr_land_future %>% table(useNA = "always")
long$syr_home_doc %>% table(useNA = "always")

econ_syria = c("jobs_origin", "debt_fin", "syr_property_1", "syr_property_2", "syr_property_3", 
               "syr_land_future", "syr_home_doc")

summary(long[long$.imp > 0,econ_syria])

data_pc = matrix(nrow = length(econ_syria), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_econ_syria[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, econ_syria], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, econ_syria], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, econ_syria], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_econ_syria_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, econ_syria], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_econ_syria_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, econ_syria])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Econ situation in Syria") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_econ_syr.pdf", height = 4, width = 5)


# Services Syria ----------------------------------------------------------
long$elect_origin %>% table(useNA = "always")
long$services_syr_elect = 6 - long$elect_origin
long$elect_origin = NULL

long$water_origin %>% table(useNA = "always")
long$services_syr_water = 6 - long$water_origin
long$water_origin = NULL

long$schools_origin %>% table(useNA = "always")
long$services_syr_scl = long$schools_origin
long$schools_origin = NULL

long$health_origin %>% table(useNA = "always")
long$services_syr_health = long$health_origin
long$health_origin = NULL

long$services_chg %>% table(useNA = "always")
long$services_syr_improve = 5 - long$services_chg
long$services_chg = NULL

services_syria = c("services_syr_elect", "services_syr_water", "services_syr_scl", 
                   "services_syr_health", "services_syr_improve")

summary(long[long$.imp > 0,services_syria])

data_pc = matrix(nrow = length(services_syria), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_services_syria[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, services_syria], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, services_syria], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, services_syria], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_services_syria_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, services_syria], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_services_syria_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, services_syria])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Services in Syria") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_services_syr.pdf", height = 4, width = 5)


# Networks Syria ----------------------------------------------------------------
long$syr_stay %>% table(useNA = "always")
long$relatives_rtrn1 %>% table(useNA = "always")
long$relatives_rtrn2 %>% table(useNA = "always")
long$hh_rtrn %>% table(useNA = "always")

networks_syria = c("syr_stay", "relatives_rtrn1", "relatives_rtrn2", "hh_rtrn")
summary(long[long$.imp > 0,networks_syria])

data_pc = matrix(nrow = length(networks_syria), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_networks_syria[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, networks_syria], cor_type = "pear")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, networks_syria], cor_type = "pear")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, networks_syria], cor_type = "pear")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_networks_syria_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, networks_syria], cor_type = "pear")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_networks_syria_variance", i, ".csv"))
  
  W = mixedCor(long[long$.imp == i, networks_syria])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Networks in Syria") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_networks_syr.pdf", height = 4, width = 5)


# Networks Lebanon --------------------------------------------------------
long$hh_syr_leb %>% table(useNA = "always")
long$leban_relatives %>% table(useNA = "always")

long$leban_relatives2 = recode(long$leban_relatives, "3" = 0, "2" = 1, "1" = 2, .default = NA_real_)
long$leban_relatives2 %>% table(useNA = "always")

networks_lebanon = c("hh_syr_leb", "ipl_lebanese", "ipl_syrian", "leban_relatives2", "ipl_meal")
summary(long[long$.imp>0,networks_lebanon])

data_pc = matrix(nrow = length(networks_lebanon), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_networks_lebanon[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, networks_lebanon], cor_type = "pear")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, networks_lebanon], cor_type = "pear")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, networks_lebanon], cor_type = "pear")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_networks_lebanon_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, networks_lebanon], cor_type = "pear")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_networks_lebanon_variance", i, ".csv"))
  
  W = mixedCor(long[long$.imp == i, networks_lebanon])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Networks in Lebanon") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_networks_leb.pdf", height = 4, width = 5)



# Information 9.1 ---------------------------------------------------------
long$know_safe %>% table(useNA = "always")
long$know_jobs %>% table(useNA = "always")
long$know_services %>% table(useNA = "always")
long$know_conscription %>% table(useNA = "always")

long$info_safe = ifelse(long$know_safe == 1, 1, ifelse(long$know_safe %in% c(2, 3), 0, NA))
long$info_jobs = ifelse(long$know_jobs == 1, 1, ifelse(long$know_jobs %in% c(2, 3), 0, NA))
long$info_services = ifelse(long$know_services == 1, 1, ifelse(long$know_services %in% c(2, 3), 0, NA))
long$info_conscription = ifelse(long$know_conscription == 1, 1, ifelse(long$know_conscription %in% c(2, 3), 0, NA))

long$info_safe %>% table(useNA = "always")
long$info_jobs %>% table(useNA = "always")
long$info_services %>% table(useNA = "always")
long$info_conscription %>% table(useNA = "always")

long$info_syr_rln %>% table(useNA = "always")

long$communicate_freq_urb %>% table
long$info_syr_communicate = long$communicate_freq_urb
long$info_syr_communicate = 7 - long$info_syr_communicate

info_quality = c("info_safe", "info_jobs", "info_services", "info_conscription", "info_syr_rln", "info_syr_communicate")
summary(long[long$.imp>0,info_quality])
summary(long[long$.imp==0,info_quality])

data_pc = matrix(nrow = length(info_quality), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$index_info_quality[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, info_quality], cor_type = "poly")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, info_quality], cor_type = "poly")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, info_quality], cor_type = "poly")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/index_info_quality_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, info_quality], cor_type = "poly")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/index_info_quality_variance", i, ".csv"))
  
  W = polychoric(long[long$.imp == i, info_quality])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Confidence in information") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_info_confidence.pdf", height = 4, width = 5)


# Mobility costs ----------------------------------------------------------
# switch source
mobility = read.csv("data/google_maps_travel_distance_output.csv", stringsAsFactors = F)

mobility = mobility %>% select(response_num, log_travel_distance) %>% rename(index_mobility = log_travel_distance)

long = long %>% left_join(mobility)

long$index_family = log(long$hh_size + 1) #When we asked for household size, we excluded the head of household

indices = long %>% 
  select(c(response_num, names(long)[str_detect(names(long), "^index")]))

indices %>% write.csv("out/indices.csv", row.names = F)

# Fixing outcomes -------------------------------------------------------
long$rtrn_head_12 = recode(long$rtrn_12mon, "1" = 1, "2" = 0, "3" = 0, .default = NA_real_)

long$rtrn_head_ever = recode(long$rtrn_ever, "1" = 1, "2" = 0, "3" = 0, .default = NA_real_)

long$rtrn_ever %>% table(useNA = "always")
long$rtrn_head_never = recode(long$rtrn_ever, "1" = 0, "2" = 0, "3" = 1, .default = NA_real_)

long$rtrn_head_never[long$rtrn_head_12==1] %>% table

long$rtrn_head_2years = long$expect2yr_syria

long$rtrn_resources %>% table(useNA = "always")
long$prep_resources = long$rtrn_resources
long$rtrn_resources = NULL

long$rtrn_docs %>% table(useNA = "always")
long$prep_docs = long$rtrn_docs
long$rtrn_docs = NULL

long$rtrn_author %>% table(useNA = "always")
long$prep_author = long$rtrn_author
long$rtrn_author = NULL

long$rtrn_unhcr %>% table(useNA = "always")
long$prep_unhcr = long$rtrn_unhcr
long$rtrn_unhcr = NULL

long$rtrn_scope %>% table(useNA = "always")
long$prep_scope = long$rtrn_scope
long$rtrn_scope = NULL

long$rtrn_abort %>% table(useNA = "always")
long$prep_abort = long$rtrn_abort
long$rtrn_abort = NULL

vars = c("prep_resources", "prep_docs", "prep_author", "prep_unhcr", "prep_scope", "prep_abort")
long$prep_resources %>% table(useNA = "always")
long$prep_docs %>% table(useNA = "always")
long$prep_author %>% table(useNA = "always")
long$prep_unhcr %>% table(useNA = "always")
long$prep_scope %>% table(useNA = "always")
long$prep_abort %>% table(useNA = "always")
long[long$.imp>0,vars] %>% summary

data_pc = matrix(nrow = length(vars), ncol = 11)
data_pc[,1] = 1:nrow(data_pc)

for(i in 1:m){
  long$prep_pca[long$.imp == i] = PCA_extract(vars = long[long$.imp == i, vars], cor_type = "pear")[[2]] %>% as.vector()
  names = PCA_extract(vars = long[long$.imp == i, vars], cor_type = "pear")[[1]]$loadings[,1] %>% names
  vals = PCA_extract(vars = long[long$.imp == i, vars], cor_type = "pear")[[1]]$loadings[,1] %>% round(digits = 2) %>% as.character()
  tibble(names, vals) %>% write.csv(paste0("out/mi_loadings/prep_pca_loadings", i, ".csv"), row.names = F)
  PCA_extract(vars = long[long$.imp == i, vars], cor_type = "pear")[[1]]$Vaccounted %>% round(2) %>% write.csv(paste0("out/mi_variance/prep_pca_variance", i, ".csv"))
  
  W = mixedCor(long[long$.imp == i, vars])$rho
  eigenvalues = principal(r = W, nfactors = ncol(W), rotate = "none")$values
  
  data_pc[, i+1] = eigenvalues
}

data_pc = data_pc %>% as_tibble()
names(data_pc) = c("pc", paste0("Imputation ", 1:10))
data_pc = data_pc %>% pivot_longer(cols = -pc)
data_pc$name = factor(data_pc$name, levels = paste0("Imputation ", 1:10))

data_pc %>% 
  ggplot(aes(x = pc, y = value, colour = name)) + 
  geom_line() +
  geom_point() + 
  geom_jitter(width = .1, height = .1) + 
  theme_few() + 
  scale_color_hc() +
  labs(x = "Principal Component", y = "Eigenvalue", subtitle = "Prepare to return") + 
  scale_x_continuous(breaks = 1:max(data_pc$pc)) + 
  theme(legend.position = "bottom", legend.title = element_blank(), 
        legend.text = element_text(size = 6), axis.text.x = element_text(size = 9))
ggsave("figures/appendix/scree_prep.pdf", height = 4, width = 5)


# Fixing covariates (control variables) -------------------------------------------------------
long$syr_origin %>% table(useNA = "always")
long$syr_origin %>% class

long$syr_origin_urban %>% table(useNA = "always")

long$head_educ %>% table(useNA = "always")

#Coding for finished secondary school
long$head_educ2 = recode(long$head_educ, "6" = 1, "5" = 1, "4" = 1, "3" = 0, "2" = 0, "1" = 0, .default = NA_real_)
long$head_educ = NULL

long$sick %>% table(useNA = "always")
long$toddler %>% table(useNA = "always")
long$elderly %>% table(useNA = "always")
long$single_parent %>% table(useNA = "always")
table(long$head_female, long$female_headed_hh)

long$female_headed_hh %>% table(useNA = "always")
long$head_female %>% table(useNA = "always")

long$syr_origin %>% table(useNA = "always")
long$syr_origin_urban %>% table(useNA = "always")
long$location_its %>% table(useNA = "always")
long$sick %>% table(useNA = "always")
long$head_educ2 %>% table(useNA = "always")
long$toddler %>% table(useNA = "always")
long$elderly %>% table(useNA = "always")
long$female_headed_hh %>% table(useNA = "always")
long$single_parent
long$single_parent_final %>% table(useNA = "always")
long$head_female %>% table(useNA = "always")
long$location_district %>% table(useNA = "always")
long[long$.imp>0,vars] %>% summary

hezb = readRDS("data/hezb_control.rds")
hezb$response_num %>% summary
hezb$hezb_control %>% table

long = long %>% left_join(hezb)
long$hezb_control %>% table

long$hezb_control %>% table(useNA = "always")

covars = c("syr_origin", "syr_origin_urban", "location_its", "sick", "head_educ2", "toddler", "elderly", "female_headed_hh", "location_district", "hezb_control")
long[long$.imp>0,covars] %>% summary


# Additive indices --------------------------------------------------------
long_additive = long
standardize = function(x){
  x = (x - mean(x)) / sd(x)
}


for(i in 1:m){
  #Econ Lebanon
  long_additive[long_additive$.imp == i, econ_well_being_leb] = long_additive[long_additive$.imp == i,econ_well_being_leb] %>% map_df(standardize)
  long_additive$additive_econ_leb[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i,econ_well_being_leb])
  long_additive$additive_econ_leb[long_additive$.imp == i] = long_additive$additive_econ_leb[long_additive$.imp == i] %>% standardize
  
  #Econ Lebanon
  #long_additive[long_additive$.imp == i, econ_well_being_leb] = long_additive[long_additive$.imp == i, econ_well_being_leb] %>% map_df(standardize)
  #long_additive$additive_econ_leb[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i ,econ_well_being_leb])
  #long_additive$additive_econ_leb[long_additive$.imp == i] = long_additive$additive_econ_leb[long_additive$.imp == i] %>% standardize
  
  #Social Lebanon
  long_additive[long_additive$.imp == i, soc_well_being_leb] = long_additive[long_additive$.imp == i, soc_well_being_leb] %>% map_df(standardize)
  long_additive$additive_soc_leb[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, soc_well_being_leb])
  long_additive$additive_soc_leb[long_additive$.imp == i] = long_additive$additive_soc_leb[long_additive$.imp == i] %>% standardize
  
  #Services Lebanon
  long_additive[long_additive$.imp == i, services_lebanon] = long_additive[long_additive$.imp == i, services_lebanon] %>% map_df(standardize)
  long_additive$additive_services_leb[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, services_lebanon])
  long_additive$additive_services_leb[long_additive$.imp == i] = long_additive$additive_services_leb[long_additive$.imp == i] %>% standardize
  
  #Legal Lebanon
  long_additive[long_additive$.imp == i, legal_lebanon] = long_additive[long_additive$.imp == i, legal_lebanon] %>% map_df(standardize)
  long_additive$additive_legal_leb[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, legal_lebanon])
  long_additive$additive_legal_leb[long_additive$.imp == i] = long_additive$additive_legal_leb[long_additive$.imp == i] %>% standardize
  
  #Networks Lebanon
  long_additive[long_additive$.imp == i, networks_lebanon] = long_additive[long_additive$.imp == i, networks_lebanon] %>% map_df(standardize)
  long_additive$additive_networks_lebanon[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, networks_lebanon])
  long_additive$additive_networks_lebanon[long_additive$.imp == i] = long_additive$additive_networks_lebanon[long_additive$.imp == i] %>% standardize
  
  #Safety Syria
  long_additive[long_additive$.imp == i, safety_syria] = long_additive[long_additive$.imp == i, safety_syria] %>% map_df(standardize)
  long_additive$additive_safety_syria[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, safety_syria])
  long_additive$additive_safety_syria[long_additive$.imp == i] = long_additive$additive_safety_syria[long_additive$.imp == i] %>% standardize
  
  #Econ Syria
  long_additive[long_additive$.imp == i, econ_syria] = long_additive[long_additive$.imp == i, econ_syria] %>% map_df(standardize)
  long_additive$additive_econ_syria[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, econ_syria])
  long_additive$additive_econ_syria[long_additive$.imp == i] = long_additive$additive_econ_syria[long_additive$.imp == i] %>% standardize
  
  #Services Syria
  long_additive[long_additive$.imp == i, services_syria] = long_additive[long_additive$.imp == i, services_syria] %>% map_df(standardize)
  long_additive$additive_services_syria[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, services_syria])
  long_additive$additive_services_syria[long_additive$.imp == i] = long_additive$additive_services_syria[long_additive$.imp == i] %>% standardize
  
  #Networks Syria
  long_additive[long_additive$.imp == i, networks_syria] = long_additive[long_additive$.imp == i, networks_syria] %>% map_df(standardize)
  long_additive$additive_networks_syria[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, networks_syria])
  long_additive$additive_networks_syria[long_additive$.imp == i] = long_additive$additive_networks_syria[long_additive$.imp == i] %>% standardize
  
  #Information Quality
  long_additive[long_additive$.imp == i, info_quality] = long_additive[long_additive$.imp == i, info_quality] %>% map_df(standardize)
  long_additive$additive_info_quality[long_additive$.imp == i] = rowSums(long_additive[long_additive$.imp == i, info_quality])
  long_additive$additive_info_quality[long_additive$.imp == i] = long_additive$additive_info_quality[long_additive$.imp == i] %>% standardize
  
}
long_additive$additive_info_quality %>% summary
long_additive$additive_mobility = long_additive$index_mobility
long_additive$additive_family = long_additive$index_family
#long_additive$additive_control_syria = NULL #Exluding  control bcz it doesnt make sense to have an additive index of this


long_additive = long_additive %>% select(.imp, response_num, names(long_additive)[str_detect(names(long_additive), "additive")])
